Warning: this document contains 2 failing validations.
Check that all the directories for the .nc files got made
source_dl <- dir(here("data_raw", "CMIP6"))
source_id <- idx$source_id %>% unique() %>% str_to_lower() %>% str_replace_all("-", "_")
stop_if_not(!any(!source_id %in% source_dl))
Check that all the corresponding .csv files exist
csvs <- list.files(here('data'))
stop_if_not(!any(!paste0(source_id, "_data.csv") %in% csvs))
Perform necessary calculations to compare PET and SPEI among models and between models and observed.
For PET, I’m using the “energy-only” method proposed by Milly and Dune (2016) eq. 8:
\[ PET = 0.8(R_n - G) \]
Except that in their notes, they estimate \(R_n -G\) as hfls + hfss after converting to units of mm/day using the latent heat of vaporazation of water, given by their eq. 2:
\[ L_v(T) = 2.501 - 0.002361T \] in MJ/kg
For the observed data and the CMIP6 data from the same period, I calculate 3-month SPEI using precipitation and PET. I then use SPEI to categorize months as experiencing mild, moderate, severe, extreme, or no drought. I compare the frequencies between observed and CMIP6 data with a Chi-squared goodness-of-fit test (p < 0.05 = dissimilar frequencies).
| Comparison of observed data to CMIP6 'historical' output | |||||||||
|---|---|---|---|---|---|---|---|---|---|
| Data only from 1980 to 2015 to match observed. Results under 'Seasonality' refer to means for each month. Results under 'Historical Drought Freq' use SPEI3 to categorize drought into no drought (blue), mild (yellow), moderate (orange), severe (dark orange), and extreme (red) | |||||||||
| Source | Mean annual pr (∆obs) [mm/yr]3 | Seasonality1 | Historical Drought Freq.2 | ||||||
| precip | temp | pr | tas | tasmin | tasmax | drought freq. | Chi-squared | ||
| observed4 | 2341 (0) | 1.00 | 1.00 | 1.00 | 1.00 | 1.000 | |||
| access_cm2 | 1606 (-735) | 0.03 | −0.16 | 0.34 | 0.03 | 0.380 | |||
| access_esm1_5 | 2179 (-162) | 0.69 | 0.43 | 0.63 | 0.50 | 0.088 | |||
| awi_cm_1_1_mr | 1899 (-442) | 0.80 | 0.49 | 0.65 | 0.53 | 0.068 | |||
| bcc_csm2_mr | 887 (-1454) | 0.17 | 0.38 | 0.62 | 0.43 | 0.131 | |||
| canesm5 | 1211 (-1130) | 0.43 | 0.10 | 0.50 | 0.17 | 0.034 | |||
| ciesm | 3 (-2338) | 0.95 | 0.73 | 0.62 | 0.48 | 0.324 | |||
| cmcc_esm2 | 1823 (-518) | 0.73 | 0.46 | 0.72 | 0.50 | 0.056 | |||
| ec_earth3 | 1809 (-532) | 0.31 | 0.06 | 0.53 | 0.15 | 0.209 | |||
| ec_earth3_veg_lr | 1789 (-552) | 0.27 | 0.04 | 0.45 | 0.18 | 0.859 | |||
| fgoals_g3 | 918 (-1423) | 0.92 | 0.62 | 0.48 | 0.76 | 0.636 | |||
| fio_esm_2_0 | 2244 (-97) | 0.90 | 0.68 | 0.57 | 0.75 | 0.316 | |||
| gfdl_esm4 | 1782 (-559) | 0.78 | 0.49 | 0.85 | 0.52 | 0.362 | |||
| inm_cm4_8 | 2564 (223) | 0.87 | 0.15 | 0.43 | 0.50 | 0.283 | |||
| inm_cm5_0 | 2721 (380) | 0.92 | 0.03 | 0.27 | 0.50 | 0.174 | |||
| ipsl_cm6a_lr | 2149 (-192) | 0.90 | 0.69 | 0.55 | 0.69 | 0.991 | |||
| miroc6 | 1982 (-359) | 0.80 | 0.51 | 0.71 | 0.41 | 0.840 | |||
| mpi_esm1_2_lr | 1551 (-790) | 0.66 | 0.29 | 0.40 | 0.34 | 0.222 | |||
| mri_esm2_0 | 2967 (626) | 0.83 | 0.24 | 0.52 | 0.57 | 0.001 | |||
| nesm3 | 1643 (-698) | −0.06 | −0.27 | 0.17 | −0.27 | 0.168 | |||
|
1
Spearman's rho. Rho < 0.45 highlighted in red.
2
p-value from Chi-squared goodness-of-fit test comparing frequency of categories of drought to observed. A smaller p-value means more dissimilar frequencies.
3
Red indicates mean annual precipitation not within ± 20% of observed
4
Observed data from Xavier et al. (2016)
|
|||||||||
Below are validation reports and plots of all data downloaded from each CMIP6 source.
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
## Error: The `col_vals_expr()` validation failed beyond the absolute threshold level (1).
## * failure level (5076) >= failure threshold (1)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
## Error: Exceedance of failed test units where values in `tasmax` should have been between `10` and `55`.
## The `col_vals_between()` validation failed beyond the absolute threshold level (1).
## * failure level (225) >= failure threshold (1)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)
col_exists(
df_list[[i]],
c("hfls", "hfss", "tas", "tasmin", "tasmax", "pr"),
label = "all variables exist"
)
col_vals_make_set(
df_list[[i]],
experiment_id,
c("historical", "ssp126", "ssp245", "ssp585"),
label = "all scenarios exist"
)
col_vals_not_null(
df_list[[i]],
c(hfls, hfss, tas, tasmin, tasmax, pr),
label = "no missing values"
)
col_vals_expr(
df_list[[i]],
~max(.$pr) > as_units(200, "mm/month"),
label = "precipitation is reasonable"
)
col_vals_between(
df_list[[i]],
c(tas, tasmin, tasmax),
10,
55,
label = "temperature is reasonable",
preconditions = ~. %>% dplyr::mutate(across(starts_with("tas"), as.numeric))
)